{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b82ea9b9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "登录cookie inited_user=2e7ca9e45c86e4ba6450f1cafcda4996; XSRF-TOKEN=-RE1AUCHT5emECCUbOy_xg; __gc_id=eb77943ee1ab4502a8d839c0cec82a67; _ga=GA1.1.234082481.1703470003; __uuid=1703470003225.88; __tlog=1703470003227.34%7C00000000%7C00000000%7C00000000%7C00000000; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1703470003; acw_tc=276077be17034700039008848e44b46d9bff90c2dad7483f266448cacb49bd; UniqueKey=e7684054a0ce036ba9dba665e9526a14; liepin_login_valid=0; lt_auth=s7wJP3MHx1ussXje22RY56dEho%2F7AWrK8S4PhBhU0oXtCvy24P%2FmQAmPrbgD%2FioIqxtzfqkzMLf%2BNuz5yXNO6ksT%2F1GkkIC0uuW52WEBR%2B1cN8W2vezHl8zRQpQcl0AC8nFbtkIL%2BQ%3D%3D; access_system=C; user_roles=0; user_photo=5f8fa3a679c7cc70efbf444e08u.png; user_name=%E5%88%98%E6%98%8A%E6%B4%8B; need_bind_tel=false; new_user=false; c_flag=a1db9faae25c5345c507ec879e4bf99f; inited_user=2e7ca9e45c86e4ba6450f1cafcda4996; __session_seq=3; __uv_seq=3; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1703470041; imId=f1e6101f1bbdcc6896943371b749fa5c; imId_0=f1e6101f1bbdcc6896943371b749fa5c; imClientId=f1e6101f1bbdcc68cdc92f29a3b1d3ae; imClientId_0=f1e6101f1bbdcc68cdc92f29a3b1d3ae; imApp_0=1; fe_im_socketSequence_new_0=1_0_1; __tlg_event_seq=18; fe_im_opened_pages=; fe_im_connectJson_0=%7B%220_e7684054a0ce036ba9dba665e9526a14%22%3A%7B%22socketConnect%22%3A%222%22%2C%22connectDomain%22%3A%22liepin.com%22%7D%7D; _ga_54YTJKWN86=GS1.1.1703470002.1.1.1703470054.0.0.0\n",
      "这是第1页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第2页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第3页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第4页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第5页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第6页，接下来将先等待3秒...然后以继续抓取\n",
      "这是第7页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第8页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第9页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第10页，接下来将先等待10秒...然后以继续抓取\n",
      "这是第11页，接下来将先等待3秒...然后以继续抓取\n",
      "这是第12页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第13页，接下来将先等待3秒...然后以继续抓取\n",
      "这是第14页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第15页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第16页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第17页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第18页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第19页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第20页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第21页，接下来将先等待9秒...然后以继续抓取\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dataInfo</th>\n",
       "      <th>dataParams</th>\n",
       "      <th>job.labels</th>\n",
       "      <th>job.link</th>\n",
       "      <th>job.title</th>\n",
       "      <th>job.salary</th>\n",
       "      <th>job.requireEduLevel</th>\n",
       "      <th>job.jobKind</th>\n",
       "      <th>job.jobId</th>\n",
       "      <th>job.refreshTime</th>\n",
       "      <th>...</th>\n",
       "      <th>recruiter.recruiterName</th>\n",
       "      <th>recruiter.recruiterTitle</th>\n",
       "      <th>recruiter.recruiterPhoto</th>\n",
       "      <th>comp.link</th>\n",
       "      <th>comp.compName</th>\n",
       "      <th>comp.compScale</th>\n",
       "      <th>comp.compStage</th>\n",
       "      <th>comp.compLogo</th>\n",
       "      <th>comp.compId</th>\n",
       "      <th>comp.compIndustry</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobK...</td>\n",
       "      <td>{\"imId\":\"bf25a1674eb4efbc9941166e65ebdb6f\",\"im...</td>\n",
       "      <td>[]</td>\n",
       "      <td>https://www.liepin.com/job/1963488473.shtml</td>\n",
       "      <td>6G系统算法工程师</td>\n",
       "      <td>薪资面议</td>\n",
       "      <td>硕士</td>\n",
       "      <td>2</td>\n",
       "      <td>63488473</td>\n",
       "      <td>20231222174738</td>\n",
       "      <td>...</td>\n",
       "      <td>王先生</td>\n",
       "      <td></td>\n",
       "      <td>5f8f986c79c7cc70efbf36c808u.jpg</td>\n",
       "      <td>https://www.liepin.com/company/1337351/</td>\n",
       "      <td>三星电子</td>\n",
       "      <td>10000人以上</td>\n",
       "      <td>已上市</td>\n",
       "      <td>6323cefeb997ee0d591cec8003u.png</td>\n",
       "      <td>1337351.0</td>\n",
       "      <td>电子/半导体/集成电路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobK...</td>\n",
       "      <td>{\"imId\":\"346ab1a87c603986337193710fe42056\",\"im...</td>\n",
       "      <td>[物流监管, 物流规划, 物流跟单]</td>\n",
       "      <td>https://www.liepin.com/job/1963799911.shtml</td>\n",
       "      <td>物流管理主管</td>\n",
       "      <td>薪资面议</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>2</td>\n",
       "      <td>63799911</td>\n",
       "      <td>20231222210311</td>\n",
       "      <td>...</td>\n",
       "      <td>周女士</td>\n",
       "      <td>招聘经理</td>\n",
       "      <td>5f8f9866dfb13a7dee342f1808u.jpg</td>\n",
       "      <td>https://www.liepin.com/company/865813/</td>\n",
       "      <td>中信金属股份有限公司</td>\n",
       "      <td>100-499人</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5bd81fb98e50fb8f55a86d9804a.png</td>\n",
       "      <td>865813.0</td>\n",
       "      <td>贸易/进出口</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobK...</td>\n",
       "      <td>{\"imId\":\"05391792270dd5792febfee5850a4afd\",\"im...</td>\n",
       "      <td>[]</td>\n",
       "      <td>https://www.liepin.com/job/1963715449.shtml</td>\n",
       "      <td>PD Engineer</td>\n",
       "      <td>30-60k·15薪</td>\n",
       "      <td>硕士</td>\n",
       "      <td>2</td>\n",
       "      <td>63715449</td>\n",
       "      <td>20231219170148</td>\n",
       "      <td>...</td>\n",
       "      <td>周女士</td>\n",
       "      <td>招聘HR</td>\n",
       "      <td>5f8f9866dfb13a7dee342f1808u.jpg</td>\n",
       "      <td>https://www.liepin.com/company/8871522/</td>\n",
       "      <td>Horizon Robotics</td>\n",
       "      <td>2000-5000人</td>\n",
       "      <td>C轮</td>\n",
       "      <td>64780de4f866f04c41650ea606u.png</td>\n",
       "      <td>8871522.0</td>\n",
       "      <td>人工智能</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobK...</td>\n",
       "      <td>{\"imId\":\"2a12f5a36234ed788b0ca3877d009dea\",\"im...</td>\n",
       "      <td>[python]</td>\n",
       "      <td>https://www.liepin.com/a/51178383.shtml</td>\n",
       "      <td>Python开发工程师</td>\n",
       "      <td>20-30k·20薪</td>\n",
       "      <td>硕士</td>\n",
       "      <td>1</td>\n",
       "      <td>51178383</td>\n",
       "      <td>20231225084044</td>\n",
       "      <td>...</td>\n",
       "      <td>侯先生</td>\n",
       "      <td>猎头顾问</td>\n",
       "      <td>652128ba6fe1f42550af500d06u.jpeg</td>\n",
       "      <td></td>\n",
       "      <td>某大型知名基金/证券/期货公司</td>\n",
       "      <td>10000人以上</td>\n",
       "      <td>已上市</td>\n",
       "      <td>60e2fe0bf3df194a3c48adb502u.png</td>\n",
       "      <td>NaN</td>\n",
       "      <td>基金/证券/期货</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobK...</td>\n",
       "      <td>{\"imId\":\"f335ff30c24adf0671de52c3bc0b6279\",\"im...</td>\n",
       "      <td>[python, django, linux, mysql, 数据库, postgresql...</td>\n",
       "      <td>https://www.liepin.com/job/1936325229.shtml</td>\n",
       "      <td>研发工程师-Python</td>\n",
       "      <td>15-40k</td>\n",
       "      <td>本科</td>\n",
       "      <td>2</td>\n",
       "      <td>36325229</td>\n",
       "      <td>20210226175928</td>\n",
       "      <td>...</td>\n",
       "      <td>徐女士</td>\n",
       "      <td>招聘经理/主管</td>\n",
       "      <td>5f8f9865ea60860b75384fa508u.jpg</td>\n",
       "      <td>https://www.liepin.com/company/7883308/</td>\n",
       "      <td>绿盟科技集团股份有限公司</td>\n",
       "      <td>1000-2000人</td>\n",
       "      <td>创业板上市</td>\n",
       "      <td>5efebfad639c83538664d19c07u.jpg</td>\n",
       "      <td>7883308.0</td>\n",
       "      <td>计算机硬件</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobI...</td>\n",
       "      <td>{\"imId\":\"25de6c352957c130a3e33bf6eef043a2\",\"im...</td>\n",
       "      <td>[数据清洗]</td>\n",
       "      <td>https://www.liepin.com/a/51687843.shtml</td>\n",
       "      <td>Quant Developer（PYTHON方向）</td>\n",
       "      <td>26-40k·17薪</td>\n",
       "      <td>学历不限</td>\n",
       "      <td>1</td>\n",
       "      <td>51687843</td>\n",
       "      <td>20231225093149</td>\n",
       "      <td>...</td>\n",
       "      <td>王女士</td>\n",
       "      <td>猎头顾问</td>\n",
       "      <td>64642a419234763b3202768804u.png</td>\n",
       "      <td></td>\n",
       "      <td>某知名公司</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>60e2fe0bf3df194a3c48adb502u.png</td>\n",
       "      <td>NaN</td>\n",
       "      <td>基金/证券/期货</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobI...</td>\n",
       "      <td>{\"imId\":\"b6ff78e1f589721d2980490a0cdf2257\",\"im...</td>\n",
       "      <td>[python, linux, java, docker, hadoop, nlp]</td>\n",
       "      <td>https://www.liepin.com/job/1935938073.shtml</td>\n",
       "      <td>AGI-Python工程师（北京/上海）</td>\n",
       "      <td>30-50k·15薪</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>2</td>\n",
       "      <td>35938073</td>\n",
       "      <td>20210527152617</td>\n",
       "      <td>...</td>\n",
       "      <td>肖女士</td>\n",
       "      <td></td>\n",
       "      <td>5f8f9866dfb13a7dee342f1808u.jpg</td>\n",
       "      <td>https://www.liepin.com/company/8652179/</td>\n",
       "      <td>深圳市商汤科技有限公司</td>\n",
       "      <td>500-999人</td>\n",
       "      <td>B轮</td>\n",
       "      <td>5bfea5ff74719d2aa34ceff303a.png</td>\n",
       "      <td>8652179.0</td>\n",
       "      <td>计算机软件</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobI...</td>\n",
       "      <td>{\"imId\":\"a84400972b17f0be9da65a0dc5ff9f3d\",\"im...</td>\n",
       "      <td>[Celery, MySQL, Kafka, Redis, Python, 后端开发, we...</td>\n",
       "      <td>https://www.liepin.com/job/1954687861.shtml</td>\n",
       "      <td>Python后端工程师</td>\n",
       "      <td>25-35k</td>\n",
       "      <td>本科</td>\n",
       "      <td>2</td>\n",
       "      <td>54687861</td>\n",
       "      <td>20230411111419</td>\n",
       "      <td>...</td>\n",
       "      <td>梅女士</td>\n",
       "      <td>助理顾问(AC)</td>\n",
       "      <td>5f8f9866dfb13a7dee342f1808u.jpg</td>\n",
       "      <td>https://www.liepin.com/company/13253053/</td>\n",
       "      <td>北京古灵阁科技有限公司</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6434d0b8471b9d0d5f00583e01u.png</td>\n",
       "      <td>13253053.0</td>\n",
       "      <td>互联网</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobI...</td>\n",
       "      <td>{\"imId\":\"a84400972b17f0be9da65a0dc5ff9f3d\",\"im...</td>\n",
       "      <td>[Celery, MySQL, Kafka, Redis, Python, 后端开发, we...</td>\n",
       "      <td>https://www.liepin.com/job/1954686271.shtml</td>\n",
       "      <td>Python工程师</td>\n",
       "      <td>25-35k</td>\n",
       "      <td>本科</td>\n",
       "      <td>2</td>\n",
       "      <td>54686271</td>\n",
       "      <td>20230411111419</td>\n",
       "      <td>...</td>\n",
       "      <td>梅女士</td>\n",
       "      <td>助理顾问(AC)</td>\n",
       "      <td>5f8f9866dfb13a7dee342f1808u.jpg</td>\n",
       "      <td>https://www.liepin.com/company/13253053/</td>\n",
       "      <td>北京古灵阁科技有限公司</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6434d0b8471b9d0d5f00583e01u.png</td>\n",
       "      <td>13253053.0</td>\n",
       "      <td>互联网</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobI...</td>\n",
       "      <td>{\"imId\":\"54255e28781c0b108759aac61f05331e\",\"im...</td>\n",
       "      <td>[]</td>\n",
       "      <td>https://www.liepin.com/job/1956322485.shtml</td>\n",
       "      <td>Python爬虫</td>\n",
       "      <td>1-2k</td>\n",
       "      <td>学历不限</td>\n",
       "      <td>2</td>\n",
       "      <td>56322485</td>\n",
       "      <td>20230925134229</td>\n",
       "      <td>...</td>\n",
       "      <td>赵女士</td>\n",
       "      <td></td>\n",
       "      <td>5f8f9863f6d1ab58476f246a08u.jpg</td>\n",
       "      <td>https://www.liepin.com/company/13425823/</td>\n",
       "      <td>湮潮栗㤥(昌都)有限责任公司</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>13425823.0</td>\n",
       "      <td>科技金融</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>805 rows × 31 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             dataInfo  \\\n",
       "0   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobK...   \n",
       "1   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobK...   \n",
       "2   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobK...   \n",
       "3   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobK...   \n",
       "4   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobK...   \n",
       "..                                                ...   \n",
       "0   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobI...   \n",
       "1   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobI...   \n",
       "2   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobI...   \n",
       "3   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobI...   \n",
       "4   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22jobI...   \n",
       "\n",
       "                                           dataParams  \\\n",
       "0   {\"imId\":\"bf25a1674eb4efbc9941166e65ebdb6f\",\"im...   \n",
       "1   {\"imId\":\"346ab1a87c603986337193710fe42056\",\"im...   \n",
       "2   {\"imId\":\"05391792270dd5792febfee5850a4afd\",\"im...   \n",
       "3   {\"imId\":\"2a12f5a36234ed788b0ca3877d009dea\",\"im...   \n",
       "4   {\"imId\":\"f335ff30c24adf0671de52c3bc0b6279\",\"im...   \n",
       "..                                                ...   \n",
       "0   {\"imId\":\"25de6c352957c130a3e33bf6eef043a2\",\"im...   \n",
       "1   {\"imId\":\"b6ff78e1f589721d2980490a0cdf2257\",\"im...   \n",
       "2   {\"imId\":\"a84400972b17f0be9da65a0dc5ff9f3d\",\"im...   \n",
       "3   {\"imId\":\"a84400972b17f0be9da65a0dc5ff9f3d\",\"im...   \n",
       "4   {\"imId\":\"54255e28781c0b108759aac61f05331e\",\"im...   \n",
       "\n",
       "                                           job.labels  \\\n",
       "0                                                  []   \n",
       "1                                  [物流监管, 物流规划, 物流跟单]   \n",
       "2                                                  []   \n",
       "3                                            [python]   \n",
       "4   [python, django, linux, mysql, 数据库, postgresql...   \n",
       "..                                                ...   \n",
       "0                                              [数据清洗]   \n",
       "1          [python, linux, java, docker, hadoop, nlp]   \n",
       "2   [Celery, MySQL, Kafka, Redis, Python, 后端开发, we...   \n",
       "3   [Celery, MySQL, Kafka, Redis, Python, 后端开发, we...   \n",
       "4                                                  []   \n",
       "\n",
       "                                       job.link                  job.title  \\\n",
       "0   https://www.liepin.com/job/1963488473.shtml                  6G系统算法工程师   \n",
       "1   https://www.liepin.com/job/1963799911.shtml                     物流管理主管   \n",
       "2   https://www.liepin.com/job/1963715449.shtml                PD Engineer   \n",
       "3       https://www.liepin.com/a/51178383.shtml                Python开发工程师   \n",
       "4   https://www.liepin.com/job/1936325229.shtml               研发工程师-Python   \n",
       "..                                          ...                        ...   \n",
       "0       https://www.liepin.com/a/51687843.shtml  Quant Developer（PYTHON方向）   \n",
       "1   https://www.liepin.com/job/1935938073.shtml       AGI-Python工程师（北京/上海）   \n",
       "2   https://www.liepin.com/job/1954687861.shtml                Python后端工程师   \n",
       "3   https://www.liepin.com/job/1954686271.shtml                  Python工程师   \n",
       "4   https://www.liepin.com/job/1956322485.shtml                   Python爬虫   \n",
       "\n",
       "    job.salary job.requireEduLevel job.jobKind job.jobId job.refreshTime  ...  \\\n",
       "0         薪资面议                  硕士           2  63488473  20231222174738  ...   \n",
       "1         薪资面议                统招本科           2  63799911  20231222210311  ...   \n",
       "2   30-60k·15薪                  硕士           2  63715449  20231219170148  ...   \n",
       "3   20-30k·20薪                  硕士           1  51178383  20231225084044  ...   \n",
       "4       15-40k                  本科           2  36325229  20210226175928  ...   \n",
       "..         ...                 ...         ...       ...             ...  ...   \n",
       "0   26-40k·17薪                学历不限           1  51687843  20231225093149  ...   \n",
       "1   30-50k·15薪                统招本科           2  35938073  20210527152617  ...   \n",
       "2       25-35k                  本科           2  54687861  20230411111419  ...   \n",
       "3       25-35k                  本科           2  54686271  20230411111419  ...   \n",
       "4         1-2k                学历不限           2  56322485  20230925134229  ...   \n",
       "\n",
       "   recruiter.recruiterName  recruiter.recruiterTitle  \\\n",
       "0                      王先生                             \n",
       "1                      周女士                      招聘经理   \n",
       "2                      周女士                      招聘HR   \n",
       "3                      侯先生                      猎头顾问   \n",
       "4                      徐女士                   招聘经理/主管   \n",
       "..                     ...                       ...   \n",
       "0                      王女士                      猎头顾问   \n",
       "1                      肖女士                             \n",
       "2                      梅女士                  助理顾问(AC)   \n",
       "3                      梅女士                  助理顾问(AC)   \n",
       "4                      赵女士                             \n",
       "\n",
       "            recruiter.recruiterPhoto  \\\n",
       "0    5f8f986c79c7cc70efbf36c808u.jpg   \n",
       "1    5f8f9866dfb13a7dee342f1808u.jpg   \n",
       "2    5f8f9866dfb13a7dee342f1808u.jpg   \n",
       "3   652128ba6fe1f42550af500d06u.jpeg   \n",
       "4    5f8f9865ea60860b75384fa508u.jpg   \n",
       "..                               ...   \n",
       "0    64642a419234763b3202768804u.png   \n",
       "1    5f8f9866dfb13a7dee342f1808u.jpg   \n",
       "2    5f8f9866dfb13a7dee342f1808u.jpg   \n",
       "3    5f8f9866dfb13a7dee342f1808u.jpg   \n",
       "4    5f8f9863f6d1ab58476f246a08u.jpg   \n",
       "\n",
       "                                   comp.link     comp.compName comp.compScale  \\\n",
       "0    https://www.liepin.com/company/1337351/              三星电子       10000人以上   \n",
       "1     https://www.liepin.com/company/865813/        中信金属股份有限公司       100-499人   \n",
       "2    https://www.liepin.com/company/8871522/  Horizon Robotics     2000-5000人   \n",
       "3                                              某大型知名基金/证券/期货公司       10000人以上   \n",
       "4    https://www.liepin.com/company/7883308/      绿盟科技集团股份有限公司     1000-2000人   \n",
       "..                                       ...               ...            ...   \n",
       "0                                                        某知名公司         50-99人   \n",
       "1    https://www.liepin.com/company/8652179/       深圳市商汤科技有限公司       500-999人   \n",
       "2   https://www.liepin.com/company/13253053/       北京古灵阁科技有限公司          1-49人   \n",
       "3   https://www.liepin.com/company/13253053/       北京古灵阁科技有限公司          1-49人   \n",
       "4   https://www.liepin.com/company/13425823/    湮潮栗㤥(昌都)有限责任公司                  \n",
       "\n",
       "   comp.compStage                    comp.compLogo comp.compId  \\\n",
       "0             已上市  6323cefeb997ee0d591cec8003u.png   1337351.0   \n",
       "1             NaN  5bd81fb98e50fb8f55a86d9804a.png    865813.0   \n",
       "2              C轮  64780de4f866f04c41650ea606u.png   8871522.0   \n",
       "3             已上市  60e2fe0bf3df194a3c48adb502u.png         NaN   \n",
       "4           创业板上市  5efebfad639c83538664d19c07u.jpg   7883308.0   \n",
       "..            ...                              ...         ...   \n",
       "0           融资未公开  60e2fe0bf3df194a3c48adb502u.png         NaN   \n",
       "1              B轮  5bfea5ff74719d2aa34ceff303a.png   8652179.0   \n",
       "2             NaN  6434d0b8471b9d0d5f00583e01u.png  13253053.0   \n",
       "3             NaN  6434d0b8471b9d0d5f00583e01u.png  13253053.0   \n",
       "4             NaN  61b07937d0458d53c627567e02u.jpg  13425823.0   \n",
       "\n",
       "    comp.compIndustry  \n",
       "0         电子/半导体/集成电路  \n",
       "1              贸易/进出口  \n",
       "2                人工智能  \n",
       "3            基金/证券/期货  \n",
       "4               计算机硬件  \n",
       "..                ...  \n",
       "0            基金/证券/期货  \n",
       "1               计算机软件  \n",
       "2                 互联网  \n",
       "3                 互联网  \n",
       "4                科技金融  \n",
       "\n",
       "[805 rows x 31 columns]"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import crawl_liepin\n",
    "df = crawl_liepin.crawl(城市=\"北京\",关键词=\"python\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "72af1450",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "import time\n",
    "\n",
    "def crawl_text_from_link(url):\n",
    "    try:\n",
    "        headers = {\n",
    "            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36'\n",
    "        }\n",
    "\n",
    "        response = requests.get(url, headers=headers, timeout=10)\n",
    "        response.raise_for_status()\n",
    "\n",
    "        time.sleep(2)\n",
    "\n",
    "        soup = BeautifulSoup(response.text, 'html.parser')\n",
    "\n",
    "        # 修改这里，使用 'p' 标签\n",
    "        paragraphs = soup.find_all('dd')\n",
    "        text_from_link = '\\n'.join([paragraph.get_text() for paragraph in paragraphs])\n",
    "\n",
    "        return text_from_link\n",
    "\n",
    "    except requests.exceptions.RequestException as e:\n",
    "        print(f\"Error: {e}\")\n",
    "        return None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2b5cbac1",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\anaconda3\\lib\\site-packages\\bs4\\__init__.py:435: MarkupResemblesLocatorWarning: The input looks more like a filename than markup. You may want to open this file and pass the filehandle into Beautiful Soup.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from bs4 import BeautifulSoup  # 确保导入BeautifulSoup\n",
    "\n",
    "技能要求 = []\n",
    "links = df['job.link']\n",
    "\n",
    "for url in links:\n",
    "    text_from_link = crawl_text_from_link(url)\n",
    "    \n",
    "    # 检查字符串是否为空\n",
    "    if text_from_link:\n",
    "        # 将字符串转换为BeautifulSoup对象\n",
    "        soup = BeautifulSoup(text_from_link, 'html.parser')\n",
    "        \n",
    "        # 使用soup对象的text属性获取文本\n",
    "        技能要求.append(soup.text)\n",
    "\n",
    "    else:\n",
    "        # 如果字符串为空，可以添加一个占位符或采取其他适当的操作\n",
    "        技能要求.append(\"No information available\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "f38b7c26",
   "metadata": {},
   "outputs": [],
   "source": [
    "df['详细要求']=技能要求"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ff65942f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_excel('北京python.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "58bf13ed",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
